LIBRARIES

options(timeout=180)
library(ggspavis)
library(openxlsx)
library(dplyr)
library(scran)
library(scater)
library(SpatialExperiment)
library(TENxVisiumData)
library(SpatialExperiment)
library(SpatialFeatureExperiment)
library(Voyager)
library(patchwork)
library(nnSVG)
library(here)

LOAD DATA

spe <- MouseBrainSagittalAnterior()
spe <- spe[, colData(spe)$sample_id == "MouseBrainSagittalAnterior1"]

PREPROCESSING

QC

Subset to keep only spots over tissue

spe <- spe[, int_colData(spe)$spatialData$in_tissue == TRUE]

Identify mitochondrial genes

is_mito <- grepl("(^MT-)|(^mt-)", rowData(spe)$symbol)
spe <- addPerCellQC(spe, subsets = list(mito = is_mito))

Select QC thresholds sum: Library size (represents the total sum of UMI counts per spot). detected: The number of expressed features (refers to the number of genes with non-zero UMI counts per spot). subsets_mito_percent: A high proportion of mitochondrial reads indicates cell damage.

To select the QC thresholds we first plot them and choose the thresholds visually.

hist(colData(spe)$sum, breaks = 20, col = "skyblue", border = "white", 
     xlab = "UMI Counts per Spot", ylab = "Frequency", 
     main = "Distribution of UMI Counts per Spot")

grid(col = "gray", lwd = 0.5)

hist(colData(spe)$detected, breaks = 20, col = "skyblue", border = "white", 
     xlab = "Number of genes with non-zero UMI counts per spot", ylab = "Frequency", 
     main = "Distribution of number of genes with non-zero UMI counts per spot")

grid(col = "gray", lwd = 0.5)

hist(colData(spe)$subsets_mito_percent, breaks = 20, col = "skyblue", border = "white", 
     xlab = "Percentage of mitochondrial reads", ylab = "Frequency", 
     main = "Distribution of percentage of mitochondrial reads")

grid(col = "gray", lwd = 0.5)

qc_lib_size <- colData(spe)$sum < 2000
qc_detected <- colData(spe)$detected < 1000
qc_mito <- colData(spe)$subsets_mito_percent > 30

Number of discarded spots for each metric

apply(cbind(qc_lib_size, qc_detected, qc_mito), 2, sum)
## qc_lib_size qc_detected     qc_mito 
##          26          23          14
discard <- qc_lib_size | qc_detected | qc_mito
table(discard)
## discard
## FALSE  TRUE 
##  2655    40
colData(spe)$discard <- discard

Plot discarded spots

# check spatial pattern of combined set of discarded spots
plotVisium(spe, x_coord = "pxl_row_in_fullres", y_coord = "pxl_col_in_fullres",
           fill = "discard")

Filter low-quality spots

spe <- spe[, !colData(spe)$discard]

NORMALIZATION

spe <- computeLibraryFactors(spe)
spe <- logNormCounts(spe)

FEATURE SELECTION

spe <- spe[!is_mito, ]

Now, let’s keep only highly variable genes.

# fit mean-variance relationship
dec <- modelGeneVar(spe, assay.type = "logcounts")
# select top HVGs
top_hvgs <- getTopHVGs(dec, prop = 0.9)
spe <- spe[rownames(spe) %in% top_hvgs,]
fit <- metadata(dec)
plot(fit$mean, fit$var, 
     xlab = "mean of log-expression", ylab = "variance of log-expression")
curve(fit$trend(x), col = "dodgerblue", add = TRUE, lwd = 2)

Anatomical regions

Non-spatial clustering

PCA

# compute PCA
set.seed(123)
spe <- runPCA(spe, subset_row = top_hvgs)

reducedDimNames(spe)
## [1] "PCA"
# graph-based clustering
set.seed(123)
k <- 28
g <- buildSNNGraph(spe, k = k, use.dimred = "PCA")
g_walk <- igraph::cluster_walktrap(g)
clus <- g_walk$membership
table(clus)
## clus
##   1   2   3   4   5   6   7   8   9 
## 424 210 320 581 476 314 129  78 123
colLabels(spe) <- factor(clus)
saveRDS(spe, file="spe.rds")

LOAD GENE SETS

Cellmarker data from http://bio-bigdata.hrbmu.edu.cn/CellMarker/CellMarker_download.html

cellmarkers <- read.xlsx('Cell_marker_Mouse.xlsx', rowNames =F)
cell_types <- cellmarkers %>% filter(tissue_class=='Brain' & cell_type=="Normal cell") %>%  select(cell_name)

Extract Gene Set info from the xlsx file

cell_types <- cellmarkers %>% filter(tissue_class=='Brain' & cell_type=="Normal cell") %>% dplyr::count(cell_name) %>% arrange(desc(n)) %>% filter(n>1) %>% select(cell_name)
cell_types <- cell_types$cell_name


# filter genes symbol info on the xlsx file
cm_sets <- lapply(cell_types, function(x) cellmarkers %>% 
                    filter(tissue_class=='Brain' & 
                             cell_type=="Normal cell" &
                             cell_name==x &
                             !(is.na(Symbol)))  %>% pull(Symbol) %>% unique())

cell_types <- unique(make.names(cell_types))
names(cm_sets) <- cell_types
set_names <- names(cm_sets)

Create list with geneset names from SpatialExperiment object

genesetlist <- lapply(set_names, function(x) {
  geneset <- rownames(rowData(spe)[rowData(spe)$symbol %in% cm_sets[[x]], ,drop=FALSE])
})

names(genesetlist) <- set_names
genesetlist <- genesetlist[sapply(genesetlist, function(x) length(x) > 1)]
saveRDS(genesetlist, file="RDS_files/genesets.rds")
# Function  for comparison between GSVA scores and clusters
source("gsva_cluster_tests.R")

Read files

# SpatialExperiment object with GSVA scores

res <- readRDS("RDS_files/gsva_res.rds")

Get spatially variable genesets

GSVA SpatialFeatureExperiment object

gsva_sfe <- toSpatialFeatureExperiment(res)
colGraph(gsva_sfe, "visium", sample_id = "MouseBrainSagittalAnterior1") <- findVisiumGraph(gsva_sfe, sample_id = "MouseBrainSagittalAnterior1", zero.policy = TRUE)

Compute Moran’s I for GSVA scores

assays(gsva_sfe)$logcounts <- assays(gsva_sfe)$es
moran_res <- runUnivariate(gsva_sfe, type = "moran", colGraphName = "visium")
rowData(moran_res)[order(rowData(moran_res)$moran_MouseBrainSagittalAnterior1,decreasing = TRUE),][1:10,2:3]
## DataFrame with 10 rows and 2 columns
##                                moran_MouseBrainSagittalAnterior1
##                                                        <numeric>
## Myelinating.stem.cell                                   0.843874
## Striatopallidal.loop.cell                               0.815366
## Vascular.leptomeningeal.cell                            0.814274
## Synaptic.cell                                           0.784577
## Myelinating.oligodendrocyte                             0.771354
## Early.GABAergic.neuron                                  0.760723
## D1.Medium.spiny.neuron.D1.MSN.                          0.756597
## Astrocyte                                               0.749458
## Satellite.glial.cell                                    0.748608
## Oligodendrocyte                                         0.721197
##                                K_MouseBrainSagittalAnterior1
##                                                    <numeric>
## Myelinating.stem.cell                                1.56658
## Striatopallidal.loop.cell                            4.16227
## Vascular.leptomeningeal.cell                         4.82437
## Synaptic.cell                                        5.09456
## Myelinating.oligodendrocyte                          4.28530
## Early.GABAergic.neuron                               3.38674
## D1.Medium.spiny.neuron.D1.MSN.                       6.13580
## Astrocyte                                            4.34787
## Satellite.glial.cell                                 4.28117
## Oligodendrocyte                                     10.92995

Plot Top 10 Spatially Autocorrelated Genesets

top_SVG_genesets <- rownames(rowData(moran_res)[order(rowData(moran_res)$moran_MouseBrainSagittalAnterior1,decreasing = TRUE),][1:10,])
plot_list_hist_gsva <- lapply(top_SVG_genesets, function(x){
  pl <- plotVisium(res,y_coord = "pxl_col_in_fullres", x_coord = "pxl_row_in_fullres", fill = x, assay = "es",sample_ids=NULL) + ggtitle(paste0(x," GSVA scores superposed in histological image" ))+ theme(text = element_text(size = 7))
  pl
})

Clusters plot

library(RColorBrewer)
cluster_plot <- plotVisium(spe,y_coord = "pxl_col_in_fullres", x_coord = "pxl_row_in_fullres", fill = "label", palette = brewer.pal(9, "Set1"))+labs(subtitle=NULL)+ggtitle("Cluster regions")+ theme(text = element_text(size = 7))
for (i in 1:length(plot_list_hist_gsva)){
    print(plot_list_hist_gsva[[i]]+cluster_plot)
}

Compute NNSVG for GSVA scores

spe_nnSVG <- readRDS("RDS_files/nnsvg_res.rds")
rowData(spe_nnSVG)[order(rowData(spe_nnSVG)$rank,decreasing = TRUE),][1:10,2:15]
## DataFrame with 10 rows and 14 columns
##                                      sigma.sq    tau.sq       phi    loglik
##                                     <numeric> <numeric> <numeric> <numeric>
## Disease.associated.microglial.cell 0.00477849 0.2838391   1.44804 -2114.817
## Neuron.glial.antigen.2.NG2..cell   0.00674967 0.1283583  13.16504 -1098.976
## Vascular.endothelial.cell          0.00933031 0.2186103   4.58197 -1792.096
## Hemogenic.endothelial.cell         0.02153829 0.1551948  37.53960 -1447.578
## Homeostatic.microglial.cell        0.02035685 0.2154609  19.84606 -1828.021
## Monocyte.derived.macrophage        0.00715999 0.0839015  11.17766  -559.162
## Plasmacytoid.dendritic.cell.pDC.   0.00613245 0.0923885   1.03317  -659.607
## Border.associated.macrophage       0.00537891 0.0614069   7.66768  -141.157
## Hippocampal.granule.precursor.cell 0.03256509 0.1749743  23.39356 -1628.272
## Venous.cell                        0.02253369 0.1891998   6.89010 -1651.130
##                                      runtime      mean       var     spcov
##                                    <numeric> <numeric> <numeric> <numeric>
## Disease.associated.microglial.cell     1.887        NA        NA        NA
## Neuron.glial.antigen.2.NG2..cell       1.411        NA        NA        NA
## Vascular.endothelial.cell              1.943        NA        NA        NA
## Hemogenic.endothelial.cell             2.881        NA        NA        NA
## Homeostatic.microglial.cell            1.881        NA        NA        NA
## Monocyte.derived.macrophage            1.781        NA        NA        NA
## Plasmacytoid.dendritic.cell.pDC.       0.426        NA        NA        NA
## Border.associated.macrophage           0.806        NA        NA        NA
## Hippocampal.granule.precursor.cell     3.129        NA        NA        NA
## Venous.cell                            1.820        NA        NA        NA
##                                      prop_sv loglik_lm   LR_stat      rank
##                                    <numeric> <numeric> <numeric> <numeric>
## Disease.associated.microglial.cell 0.0165565 -2117.623    5.6125       144
## Neuron.glial.antigen.2.NG2..cell   0.0499576 -1109.867   21.7828       143
## Vascular.endothelial.cell          0.0409331 -1804.122   24.0525       142
## Hemogenic.endothelial.cell         0.1218690 -1465.925   36.6936       141
## Homeostatic.microglial.cell        0.0863245 -1848.488   40.9340       140
## Monocyte.derived.macrophage        0.0786280  -584.042   49.7586       139
## Plasmacytoid.dendritic.cell.pDC.   0.0622451  -686.198   53.1835       138
## Border.associated.macrophage       0.0805398  -169.203   56.0925       137
## Hippocampal.granule.precursor.cell 0.1569104 -1673.804   91.0644       136
## Venous.cell                        0.1064248 -1698.438   94.6170       135
##                                           pval        padj
##                                      <numeric>   <numeric>
## Disease.associated.microglial.cell 6.04313e-02 6.04313e-02
## Neuron.glial.antigen.2.NG2..cell   1.86179e-05 1.87481e-05
## Vascular.endothelial.cell          5.98517e-06 6.06946e-06
## Hemogenic.endothelial.cell         1.07668e-08 1.09959e-08
## Homeostatic.microglial.cell        1.29211e-09 1.32903e-09
## Monocyte.derived.macrophage        1.56692e-11 1.62329e-11
## Plasmacytoid.dendritic.cell.pDC.   2.82718e-12 2.95010e-12
## Border.associated.macrophage       6.60139e-13 6.93868e-13
## Hippocampal.granule.precursor.cell 0.00000e+00 0.00000e+00
## Venous.cell                        0.00000e+00 0.00000e+00

Plot Top 10 Spatially Variable Genesets

top_nnSVG_genesets <- rownames(rowData(spe_nnSVG)[order(rowData(spe_nnSVG)$rank,decreasing = FALSE),][1:10,])

plot_list_hist_gsva_nnsvg <- lapply(top_nnSVG_genesets, function(x){
  pl <- list(x,plotVisium(res,y_coord = "pxl_col_in_fullres", x_coord = "pxl_row_in_fullres", fill = x, assay = "es",sample_ids=NULL) +  ggtitle(paste0(x," GSVA scores superposed in histological image" ))+ theme(text = element_text(size = 7)))
  pl
})
for (i in 1:length(plot_list_hist_gsva_nnsvg)){
  print(plot_list_hist_gsva_nnsvg[i][[1]][[2]]+cluster_plot)
  print(gsva_clusters_ttests(spe,res,plot_list_hist_gsva_nnsvg[i][[1]][[1]]))
}

## $clusters
## [1] 1 5 8
## 
## $t_statistics
##         t         t         t 
## -28.40401 -22.45167 -16.48003

## $clusters
## [1] 4 6
## 
## $t_statistics
##         t         t 
## -18.34119 -51.44519

## $clusters
## [1] 4
## 
## $t_statistics
## statistic.t 
##   -56.07121

## $clusters
## [1] 4
## 
## $t_statistics
##          t          t 
## -39.438710  -2.341196

## $clusters
## [1] 1 3 5 8
## 
## $t_statistics
##          t          t          t          t 
## -29.450621  -3.268205 -27.663425  -8.965769

## $clusters
## [1] 1 5 7 8 9
## 
## $t_statistics
##         t         t         t         t         t 
## -16.12438 -10.42099 -14.85694 -17.91070 -10.12684

## $clusters
## [1] 4 6
## 
## $t_statistics
##         t         t 
## -10.60505 -39.86971

## $clusters
## [1] 4 9
## 
## $t_statistics
##          t          t 
## -58.276244  -2.583948

## $clusters
## [1] 1 5 6 8
## 
## $t_statistics
##          t          t          t          t 
##  -6.938438 -17.410931  -9.773505  -3.636328

## $clusters
## [1] 4
## 
## $t_statistics
## statistic.t 
##   -54.09251

Correlation between clusters and genesets

most_corr<-most_cor_geneset(spe,res)
corr_plots <- lapply(1:length(unique(colData(spe)$label)), function(x){
  pl <- plotVisium(res,y_coord = "pxl_col_in_fullres", x_coord = "pxl_row_in_fullres", fill = most_corr[x], assay = "es",sample_ids=NULL) + labs(subtitle=NULL)+ggtitle(paste0(most_corr[x]," GSVA scores (most correlated with cluster ",x,")" ))+ theme(text = element_text(size = 7))
  })

for (i in 1:length(corr_plots)){
  print(corr_plots[[i]]+cluster_plot)
}